import os
import numpy as np
from sklearn.impute import SimpleImputer
from data_wash.utils import data_filter

WORK_PATH = os.getcwd() #获取当前项目工作目录，
DATA_PATH = os.path.join(WORK_PATH, 'data') #数据目录

file_object_path = os.path.join(DATA_PATH, 'GANSU_MONTH_DATA\gansu_data.txt')
with open(file_object_path,'r') as file_object:
    lines = file_object.readlines()

DATA_TABLE_HEAD = lines.pop(0)
print('剔除数据表头:')
print(DATA_TABLE_HEAD)

file_data = []
for line in lines:
    read_row = line.split()  # 空格作为分隔符对line一行进行切片
    temp_line = []

    for item in read_row:
        filtered = data_filter(item)  # 数据已经filter过了
        temp_line.append(filtered)

    file_data.append(temp_line)

dataset = np.array(file_data)  #  filtered 2D dataset !!


imputation_transformer = SimpleImputer(np.nan, "mean")

dataset_fitted = imputation_transformer.fit_transform(dataset)


print('【数据打印至文件...... txt 】')
write_file_path = os.path.join(DATA_PATH, 'GANSU_MONTH_DATA\gansu_data_interpolated_all.txt')
write_file = open(write_file_path, 'a+')
#打印表头
write_file.write(DATA_TABLE_HEAD)
#打印数据：
for line in dataset_fitted:
    for item in line:
        write_file.write(str(item)+' ')
    write_file.write('\n')
print('数据已输出至：'+ write_file_path)
file_object.close()
write_file.close()


print('【数据打印至文件...... csv 】')
write_file_path = os.path.join(DATA_PATH, 'GANSU_MONTH_DATA\gansu_data_interpolated_all.csv')
write_file = open(write_file_path, 'a+')
#打印表头
write_file.write('V01301,V04001,V04002,V11046,V11211_CHAR,V10302,V12012,V10301,V12011,V13305,V10004_701,V11291_701,V12001_701,V13004_701,V13003_701,V12012_701,V12011_701,V13353,V14033,V14032,V11042,V11296_CHAR,V13052,V13007\n')
#打印数据：
for line in dataset_fitted:
    for item in line:
        write_file.write(str(item)+',')
    write_file.write('\n')
print('数据已输出至：'+ write_file_path)
file_object.close()
write_file.close()